from docx import Document
import json
import re

path = 'file.docx'

doc = Document(path)
# print(doc.paragraphs)

# for paragraph in doc.paragraphs:
# 	print('------------------------')
# 	print(paragraph.text)
# 	print('------------------------')

tables=doc.tables
print(len(tables))
num=0
data=[]
for i in range(len(tables)):
    tb=tables[i]
    #获取表格的行
    tb_rows=tb.rows
    #读取每一行内容
    for i in range(len(tb_rows)):
        row_data=[]
        row_cells=tb_rows[i].cells
        text1=row_cells[0].text
        text2=row_cells[1].text
        text1=re.sub(r"\s+", "", text1)
        text2=re.sub(r"\s+", "", text2)
        if text1!='代码':
        #   print(text1,',',text2)
          num=num+1
          if num<20:  
            print(text1,',',text2)
          di={"code":text1,"value":text2}
        #   print(di)
          data.append(di)

# print(data)
json_data = json.dumps(data,ensure_ascii=False)
# print(json_data)
with open('example.txt', 'w') as f:
    f.write(json_data)
    
